from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from lxml import etree
import time
#from pyvirtualdisplay import Display

#display = Display(visible=0,size=(800,600))
#display.start()
# phantomjs 已经老旧，有警告信息，下面两行代码可以屏蔽警告信息
from warnings import filterwarnings
filterwarnings('ignore')

try:
    #binary = FirefoxBinary('/opt/firefox24/firefox')
    #profile = webdriver.FirefoxProfile('/home/fhz/.mozilla/firefox/iahn2xnf.ff24')
    #driver = webdriver.Firefox(firefox_binary=binary,firefox_profile=profile)
    driver = webdriver.PhantomJS(executable_path='/opt/phantomjs/bin/phantomjs')
    #executable_path为你的phantomjs可执行文件路径
    driver.get("http://news.sohu.com/scroll/")
    time.sleep(2)
    #或得js变量的值
    #r = driver.execute_script("return newsJason")
    #print(r)
    print('---------------第一页：')
    #print( driver.find_element_by_id("newsList").text )
    #input('enter key to  continue')
    #selenium在webdriver的DOM中使用选择器来查找元素，名字直接了当，by对象可使用的选择策略有：id,class_name,css_selector,link_text,name,tag_name,tag_name,xpath等等
    #r = driver.find_element_by_tag_name("a")
    #print( driver.find_element_by_css_selector("#content").text )
    #print( driver.find_element_by_id("newsList").text )
    r=driver.find_element_by_link_text("下一页")
    #print(r.text)
    r.click()
    #input('enter key to  continue')
    #r = driver.execute_script('return document.getElementById("channelNav").innerText')
    #print( r )
    element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "newsList")))
    print('---------------第二页：')
    #html = driver.execute_script('return document.getElementById("newsList").innerHTML')
    html = element.get_attribute("innerHTML")
    dom = etree.HTML( html )
    list_a = dom.xpath('//li/a')
    for node_a in list_a:
        href = node_a.get('href')
        text = node_a.text
        print(text,href)
    #print( html )
    #print(element.text)
    #trs = element.find_elements_by_tag_name('li')
    #for tr in trs:
    #    print(tr.text)
    #pos = element.location_once_scrolled_into_view
    #element.click()
    #print('---------------第三页：')
    #print( driver.find_element_by_id("newsList").html )
    #element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.LINK_TEXT, "下一页")))
    #input('enter key to  continue')
    driver.quit()
except Exception as e:
    print(e)
#display.stop()
